set rmsg on
set more off

************************************************************************
*
*	NOTE:		This do-file uses as input the merged samples of estimates 
*               of the model's parameters and the Compustat sample. The 
*               outputs are the descriptive Tables 3, 4, 5, 6 and 11
*
*************************************************************************

capture program drop _all
clear
clear matrix


* Define paths

	global 	data	"..."
		
	

clear
clear matrix
	
**  TABLE 3 **
**  0. Prepare data
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10
	
	merge 1:1 gvkey fyear using "$data/markups_by_gvkey.dta"
	* The markups data contains the variable markup_w, which is the estimate of firm-year markups following DeLoecker, Eeckhout ,and Unger (QJE 2020)
	drop if _merge == 2
	drop _merge
	
	merge 1:m gvkey fyear using "$data/kpss_by_gvkey.dta"
	* This file contains the firm-year patents counts (numpats) and market values (patvalue_mkt) obtained from Kogan, Papanikolaou, Seru, and Stoffman, (QJE 2017)
	drop if _merge == 2
	
**  1. Download Fama-French industries and market shares
	foreach Nind of numlist 10 12 17 30 38 {
		ffind sic4, newvar(ffind`Nind') type(`Nind')
	}
**  2.  Compute variables or ratios 
    g sales_at = sale/at
    g earnings_at = (ni + xint + txt)/at
   
    replace numpats      = 0 if _merge == 1 & numpats      == .
    replace patvalue_cw  = 0 if _merge == 1 & patvalue_cw  == .
    replace patvalue_mkt = 0 if _merge == 1 & patvalue_mkt == .
   
    g lncap_lab = ln(cap_lab)
    g lnnumpats = ln(numpats+1)
    g lnmktpats = ln(patvalue_mkt+1)

**	3. Tabulate standard deviation decompositions
	global outcomevars "sales_at roa sale_growth"
	global policyvars  "cash_atl1 cash_savings_at net_equity_issue_at creditline_at capex_at td_at rd_sale"
	global leadervars  "markup_w lnnumpats lnmktpats"
	global techvars    "lncap_lab op_lev"

	* Total standard deviation and number of observations
	estpost tabstat $techvars $outcomevars $policyvars $leadervars, stats(n me sd p5 p25 med p75 p95) col(stats)	
	local Tabsumvars = e(vars)
	matrix Tabsum1 = [e(count)',e(sd)']	
		
	* Within and between standard deviations by estimation groups (grouping variable is variable is sic3_by_g_cf5_lagat)	
	matrix SD_all = J(16,2,0)
	foreach var in sic3 sic3_by_g_cf5_lagat sic4 {
		egen firms_`var' = count(gvkey), by(`var' fyear)
		xtset `var'
		matrix SDs = [0,0]	
		foreach var of local Tabsumvars {
			xtsum `var'
			scalar sdb = r(sd_b)
			scalar sdw = r(sd_w)
			* Matrix join
			matrix SDs = [SDs \ sdb, sdw]
		}
		matrix SD_all = [SD_all, SDs]
	}
	
	* Within and between standard deviations byFama-French industry definitions
	foreach Nind of numlist 10 12 17 30 38 {
		egen firms_`Nind' = count(gvkey), by(ffind`Nind' fyear)
		xtset ffind`Nind'
		matrix SDs = [0,0]	
		foreach var of local Tabsumvars {
			xtsum `var'
			scalar sdb = r(sd_b)
			scalar sdw = r(sd_w)
			* Matrix join
			matrix SDs = [SDs \ sdb, sdw]
		}
		matrix SD_all = [SD_all, SDs]
	}
	
	
	matrix Tabsum1 = [Tabsum1,SD_all[2..16,3..18]]
	
	* Extract between- and within- standard deviations by SIC4, FF17 and by estimation groups
	matrix Tabsum2 = [Tabsum1[1...,1..2],Tabsum1[1...,7],Tabsum1[1...,13],Tabsum1[1...,5], ///
	                                      Tabsum1[1...,8],Tabsum1[1...,14],Tabsum1[1...,6]]
	                                     
	
	matrix list Tabsum2
	* Theo order of columns is: number of observations, total standard deviation, between std. devs (by SIC4, FF17, and groups, and finally within std. devs. (by SIC4, FF17, Groups.)
	
	
** TABLE 4 (TABLE 11)**	
**  0. Prepare data
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10
	* Note: this analysis can also be done directly from the file with estimates by gvkey (online)
	* This code will also produce Table 11
	
	collapse mu sigma_P alpha sigma_A rho sic3, by(sic3_by_g_cf5_lagat)

**  1. Panel A of the Table: main moments
	tabstat rho sigma_P sigma_A mu,  stats(n mean sd min p5 p25 p50 p75 p95 max) c(s)
	
*   Add within and between sic3 variation	
	estpost tabstat rho sigma_P sigma_A mu, stats(n me sd p5 p25 med p75 p95) col(stats)	
	local Tabsumvars = e(vars)
	matrix Tabsum1 = [e(count)', e(mean)', e(sd)', e(p5)', e(p25)', e(p50)', e(p75)',e(p95)']
	
	xtset sic3
	matrix SDs = [0,0]
	foreach var of local Tabsumvars {
		xtsum `var'
		scalar sdb = r(sd_b)
		scalar sdw = r(sd_w)
		* Matrix stack
		matrix SDs = [SDs \ sdb, sdw]
		}
	
	matrix Tabsum1 = [Tabsum1,SDs[2...,1..2]]
	matrix list Tabsum1 /* This is Panel A */
	
**  3. Panel C: correlations	
	pwcorr rho sigma_P sigma_A mu, sig 
	matrix Tabsum2 = [r(C),r(sig)]

	matrix list Tabsum2 /* This is panel C*/

	
** TABLE 5 ** 	
**  0. Prepare data
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10
	
**  1. Download Fama-French 17 industry definition
	foreach Nind of numlist 17 {
		ffind sic4, newvar(ffind`Nind') type(`Nind')
	}
		
** 2. Summarize rho by FF industries: Note 14 and 16 are not matched
	
	collapse (mean) rho sigma_P sigma_A mu ffind10 ffind12 ffind17 ffind30 ffind38 sic2 sic3 sic4, by(gvkey)

	sort ffind17 gvkey
	estpost tabstat rho, by(ffind17) statistics(n mean sd p5 p25 p50 p75 p95) columns(statistics) listwise
	
**  FIGURE 5: Data Export (Figure generated in Matlab by program Figure5RhoDistributionbyFF17.m)
	use "$data/GMMSV_reg_sample.dta", clear
	**  FamaFrench industries and market shares
	foreach Nind of numlist 10 12 17 30 38 {
		ffind sic4, newvar(ffind`Nind') type(`Nind')
	}

	collapse (mean) rho sigma_P sigma_A mu ffind10 ffind12 ffind17 ffind30 ffind38 sic2 sic3 sic4 rhopos, by(gvkey)
 
**  Extract now to cvs, to import in Matlab 
	sort ffind17 gvkey
	outsheet ffind10 ffind17 mu sigma_P sigma_A rho using "$data/EstimatesbygvkeyandFF17.csv", comma nolabel replace
	
**	TABLE 6 **	
**  1. Analysis by Sigma_A
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10
	
** Download FF industry 17
	foreach Nind of numlist 17 {
		egen earnings_at_`Nind' = mean(earnings_at), by(ffind`Nind' fyear)
		gen  beta_`Nind' = .
	}
	
**	Winsorize variables
	g leverage = td_at
	winsor2 naive_D leverage, replace cuts(1 99)	

	global rankvars "sigma_A cap_lab op_lev leverage aqc_at equity_vol asset_vol naive_D maturity_issue allindrawn_issue"
	
	
	foreach sicnumb of numlist 3 4 {
		foreach v of global rankvars {
			egen rank`v'_sic`sicnumb' = mean(`v'), by(sic`sicnumb' fyear) 
		}
	}	

	foreach Nind of numlist 17 {
		foreach v of global rankvars {
			egen rank`v'_`Nind' = mean(`v'), by(ffind`Nind' fyear)
		}
	}

	collapse rank*, by(gvkey)

	spearman ranksigma_A_sic3 rank*3	
		 matrix RANKSIGACORR = r(Rho)[3...,2]
		 
	spearman ranksigma_A_sic4 rank*4 
		 matrix RANKSIGACORR = (RANKSIGACORR, r(Rho)[3...,2])
		 
	foreach Nind of numlist 17 {
		spearman ranksigma_A_`Nind' rank*_`Nind'
		matrix RANKSIGACORR = (RANKSIGACORR, r(Rho)[3...,2])
	}		
	
**  2. Analysis by Sigma_P
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10	
	
**  Download FamaFrench industries and market shares
	foreach Nind of numlist 10 12 17 30 38 {
		ffind sic4, newvar(ffind`Nind') type(`Nind')
	}
	
**  Winsorize variables
 	g leverage = td_at
	winsor2 naive_D leverage, replace cuts(1 99)		

	global rankvars "sigma_P cap_lab op_lev leverage aqc_at equity_vol asset_vol naive_D maturity_issue allindrawn_issue "


	foreach sicnumb of numlist 3 4 {
		foreach v of global rankvars {
			egen rank`v'_sic`sicnumb' = mean(`v'), by(sic`sicnumb' fyear) 
		}
	}	

	foreach Nind of numlist 17 {
		foreach v of global rankvars {
			egen rank`v'_`Nind' = mean(`v'), by(ffind`Nind' fyear)
		}
	}

	collapse rank*, by(gvkey)

	spearman ranksigma_P_sic3 rank*3	
		 matrix RANKSIGPCORR = r(Rho)[3...,2]
		 
	spearman ranksigma_P_sic4 rank*4 
		 matrix RANKSIGPCORR = (RANKSIGPCORR, r(Rho)[3...,2])
		 
	foreach Nind of numlist 17 {
		spearman ranksigma_P_`Nind' rank*_`Nind'
		matrix RANKSIGPCORR = (RANKSIGPCORR, r(Rho)[3...,2])
	}		
	
**  3. Analysis by rho
	use "$data/GMMSV_reg_sample.dta", clear
	* This is the same merged data base used for the regression analysis is Tables 8,9 and 10	
	
**  Download FamaFrench industries and market shares
	foreach Nind of numlist 10 12 17 30 38 {
		ffind sic4, newvar(ffind`Nind') type(`Nind')
	}
	
**  Winsorize variables
 	g leverage = td_at
	winsor2 naive_D leverage, replace cuts(1 99)		

	global rankvars "rho cap_lab op_lev leverage aqc_at equity_vol asset_vol naive_D maturity_issue allindrawn_issue "	
	
		foreach sicnumb of numlist 3 4 {
		foreach v of global rankvars {
			egen rank`v'_sic`sicnumb' = rank(`v'), by(sic`sicnumb' fyear) field
		}
	}	

	foreach Nind of numlist 17 {
		foreach v of global rankvars {
			egen rank`v'_`Nind' = rank(`v'), by(ffind`Nind' fyear) field
		}
	}

	collapse rank*, by(gvkey)

	spearman rankrho_sic3 rank*3	
		 matrix RANKRHOCORR = r(Rho)[3...,2]
		 
	spearman rankrho_sic4 rank*4 
		 matrix RANKRHOCORR = (RANKRHOCORR, r(Rho)[3...,2])
		 
	foreach Nind of numlist 17 {
		spearman rankrho_`Nind' rank*_`Nind'
		matrix RANKRHOCORR = (RANKRHOCORR, r(Rho)[3...,2])
	}		 
	
	* Panel for Within-Industry average correlations with Sigma_A
	matrix list RANKSIGACORR
	* Panel for Within-Industry average correlations with Sigma_P
	matrix list RANKSIGPCORR	
	* Panel for Within-Industry average correlations with rho
	matrix list RANKRHOCORR
	

